Do Immigrants Commit More Crime?

Hasan Merchant, Jessi Ji, Joshua Cheng, Rajan Hulait

Research Question

  • Primary Question: How has immigration changed across Canada from 2016 to 2021 and what are it’s social effects on crime?
  • Objective: To use immigration from the Canadian censuses of 2016 and 2021 and comprehend changes in crime rates and crime severity
  • Focus: Determine which provinces saw the greatest shifts in immigration, then discuss the Provinces with increasing crime severity to determine the relationship

Motivation

3rd gen canadian

Visualization: Change in Average Severe Crime Ratio by Province

Visualization: Immigration Change by Province

Conclusion

The Northern territories, maritime provinces and Manitoba have the highest CSI.

  • Population: Sparse populations and geographic isolation can lead to fewer policing resources and longer response times.

  • Historical and Systemic Issues: Indigenous communities in some of these areas may face systemic inequities, historical trauma, and lack of support, which are factors that correlate with higher crime rates.

Appendix

Data Extraction and Cleaning

library(tidyverse)
library(cancensus) #we get an aeeorr message when we load cancesus because we havent loaded api key
library(ggrepel)
library(sf)
library(geojsonsf)
library(ggthemes)
library(plotly)
library(ggplot2)

set_cancensus_api_key('CensusMapper_30efb95ac42555afd165673b7ba4cbbb', install=TRUE, overwrite = TRUE)

census_data2016 <- get_census(dataset='CA16', regions=list(PR=c("60","59","48","47","46","61","62","35","24","10","13","12")), vectors=c("v_CA16_3411","v_CA16_401","v_CA16_406"), labels="detailed", geo_format='sf', level='PR')


census_data2021 <- get_census(dataset='CA21', regions=list(PR=c("61","60","59","48","47","46","35","24","12","13","10","11","62")), vectors=c("v_CA21_1","v_CA21_4410","v_CA21_6"), labels="detailed", geo_format=NA, level='PR') #we're saying geo fomrat is missing in this code, which if wwe could change NA tosomething else and then it will give us the data we need for spatial viz


census_data2021_2 <- census_data2021 %>%
  mutate(total.pop=`v_CA21_1: Population, 2021`) %>% #single quotations because their are spaces in the middle. so R can know referring to the column
  mutate(immig.pop.2021=`v_CA21_4410: Immigrants`) %>%
  mutate(pop.density=`v_CA21_6: Population density per square kilometre`) %>%
  mutate(name=`Region Name`) %>%
  mutate(immig.rate.2021 = immig.pop.2021/total.pop)%>%
  filter(!is.na(immig.pop.2021)) %>%
  select(immig.rate.2021,immig.pop.2021,name,GeoUID)

census_data2016_2 <- census_data2016 %>%
  mutate(total.pop=`v_CA16_401: Population, 2016`) %>%
  mutate(immig.pop.2016=`v_CA16_3411: Immigrants`) %>%
  mutate(pop.density=`v_CA16_406: Population density per square kilometre`) %>%
  mutate(immig.rate.2016 = immig.pop.2016/total.pop)%>%
  mutate(name=`Region Name`) %>%
  filter(!is.na(immig.pop.2016))%>%
  select(immig.rate.2016,immig.pop.2016,name,GeoUID)

joindata <- left_join(census_data2016_2,census_data2021_2, by = "GeoUID")%>%
  mutate(change = (immig.rate.2021-immig.rate.2016))

joindata2 <- left_join(census_data2016_2,census_data2021_2, by = "GeoUID")%>%
  mutate(change = (immig.pop.2021-immig.pop.2016))

Appendix

First Visualization:

# First plot
stat.crim.data <- read_csv("crimedatafinal.csv")%>% 
  group_by(GEO,AVERAGE_CHANGE)%>%
  summarise()

mean_csi <- mean(stat.crim.data$AVERAGE_CHANGE) 

ggplot(data= stat.crim.data,mapping = aes(x=reorder(GEO,+AVERAGE_CHANGE),
                                          y=AVERAGE_CHANGE,
                                          fill = AVERAGE_CHANGE))+geom_col(color = "black", size = 0.3)+
  coord_flip()+
  labs( title = "Average Change in Crime by Region", x = "Province",
        y = "Average Crime Change (%)",color ="#1C1C1A") + 
  theme_minimal(base_size = 12) + 
  theme(panel.grid = element_blank())+
  theme(
    plot.background = element_rect(fill = "#F0F1EA", color = NA),
    panel.background = element_rect(fill = "#F0F1EA", color = NA),
    legend.background = element_rect(fill = "#F0F1EA", color = NA),
    legend.text = element_text(color = "#1C1C1A"),
    legend.title = element_text(color = "#1C1C1A"),
    plot.title = element_text(color = "#1C1C1A", hjust = 0.5, size = 14),
    plot.subtitle = element_text(color = "#1C1C1A", hjust = 0.5, size = 10)
  )+ 
  scale_fill_gradient(
    "Crime Ratio", 
    low = "#D9E3DA", # Light green for low immigration
    high = "#465A4A", # Dark green for high immigration
    labels = scales::number
  )+geom_hline(yintercept = mean_csi, linetype = "dashed", color = "#1C1C1A")

Appendix

Second Visualization:

p <- joindata2 %>%
  ggplot() + 
  geom_sf(mapping = aes(fill = change)) + 
  scale_fill_gradient(
    "Immigration", 
    low = "#D9E3DA", # Light green for low immigration
    high = "#465A4A", # Dark green for high immigration
    labels = scales::number
  ) +
  theme_minimal() +
  theme(
    plot.background = element_rect(fill = "#F0F1EA", color = NA),
    panel.background = element_rect(fill = "#F0F1EA", color = NA),
    legend.background = element_rect(fill = "#F0F1EA", color = NA),
    legend.text = element_text(color = "#1C1C1A"),
    legend.title = element_text(color = "#1C1C1A"),
    plot.title = element_text(color = "#1C1C1A", hjust = 0.5, size = 14),
    plot.subtitle = element_text(color = "#1C1C1A", hjust = 0.5, size = 10),
    panel.grid = element_blank(),
    axis.text = element_blank(),
    axis.ticks = element_blank()
  ) +
  labs(
    title = "Change in Immigration", 
    subtitle = "Canada Census Subdivisions, 2016-2021 Census"
  ) +
  geom_sf_label(aes(label = name.x), size = 1.7)

ggplotly(p)

Appendix

Third Visualization:

#Scatter Plot
stat.crim.data2 <- read_csv("crimedatafinal.csv")%>%
  filter(REF_DATE=="2021")%>%
  mutate(GeoUID = as.character(GeoUID))%>%
  group_by(GeoUID,GEO,VALUE)%>%
  summarise()


joindata3<- joindata2%>%
  mutate(GeoUID = as.character(GeoUID))%>%
  mutate(immig.rate.2021 = immig.rate.2021*100)%>%
  select(GeoUID,immig.rate.2021)


scat_plot<- left_join(joindata3,stat.crim.data2,by="GeoUID")
cor_value <- cor(scat_plot$immig.rate.2021, scat_plot$VALUE)
ggplot(data =scat_plot,mapping = aes(x= immig.rate.2021,
                                     y=VALUE ))+geom_point(alpha = 0.5,color = "#383D3D")+geom_smooth(method = "lm",se=FALSE,color="#465A4A")+theme_minimal()+
  theme(
    plot.background = element_rect(fill = "#F0F1EA", color = NA),
    panel.background = element_rect(fill = "#F0F1EA", color = NA),
    legend.background = element_rect(fill = "#F0F1EA", color = NA),
    legend.text = element_text(color = "#1C1C1A"),
    legend.title = element_text(color = "#1C1C1A"),
    plot.title = element_text(color = "#1C1C1A", hjust = 0.5, size = 14),
    plot.subtitle = element_text(color = "#1C1C1A", hjust = 0.5, size = 10),  
    axis.text = element_blank()
  
  )+scale_y_continuous(labels=scales::percent) + geom_text_repel(data = scat_plot,mapping = aes(x=immig.rate.2021,y=VALUE,label = GEO), size =2.5,color = "#383D3D") +  annotate("text", x = 24, y = max(scat_plot$VALUE), label = paste("Correlation: ", round(cor_value, 2)), color = "#383D3D", size = 5, hjust = 0) + labs( title = "Correlation Between Immigration by Province(10y Average) and Crime Severity Index", x = "Immigration",
        y = "CSI",color ="#1C1C1A")